// Copyright 2019 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#pragma once

#include <stddef.h>
#include <stdint.h>

#include <xnnpack/common.h>
#include <xnnpack/microparams.h>

#ifdef __cplusplus
extern "C" {
#endif


#define DECLARE_F16_VABS_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                       \
      size_t n,                                    \
      const void* x,                               \
      void* y,                                     \
      const union xnn_f16_abs_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F16_VABS_UKERNEL_FUNCTION(xnn_f16_vabs_ukernel__neonfp16arith_u8)
DECLARE_F16_VABS_UKERNEL_FUNCTION(xnn_f16_vabs_ukernel__neonfp16arith_u16)

DECLARE_F16_VABS_UKERNEL_FUNCTION(xnn_f16_vabs_ukernel__sse2_u8)
DECLARE_F16_VABS_UKERNEL_FUNCTION(xnn_f16_vabs_ukernel__sse2_u16)


#define DECLARE_F16_VCLAMP_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                         \
      size_t n,                                      \
      const void* x,                                 \
      void* y,                                       \
      const union xnn_f16_minmax_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F16_VCLAMP_UKERNEL_FUNCTION(xnn_f16_vclamp_ukernel__neonfp16arith_u8)
DECLARE_F16_VCLAMP_UKERNEL_FUNCTION(xnn_f16_vclamp_ukernel__neonfp16arith_u16)

DECLARE_F16_VCLAMP_UKERNEL_FUNCTION(xnn_f16_vclamp_ukernel__rvvfp16arith_u1v)
DECLARE_F16_VCLAMP_UKERNEL_FUNCTION(xnn_f16_vclamp_ukernel__rvvfp16arith_u2v)
DECLARE_F16_VCLAMP_UKERNEL_FUNCTION(xnn_f16_vclamp_ukernel__rvvfp16arith_u4v)
DECLARE_F16_VCLAMP_UKERNEL_FUNCTION(xnn_f16_vclamp_ukernel__rvvfp16arith_u8v)

DECLARE_F16_VCLAMP_UKERNEL_FUNCTION(xnn_f16_vclamp_ukernel__f16c_u8)
DECLARE_F16_VCLAMP_UKERNEL_FUNCTION(xnn_f16_vclamp_ukernel__f16c_u16)


#define DECLARE_F16_VELU_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                       \
      size_t n,                                    \
      const void* x,                               \
      void* y,                                     \
      const union xnn_f16_elu_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F16_VELU_UKERNEL_FUNCTION(xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_u8)
DECLARE_F16_VELU_UKERNEL_FUNCTION(xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_u16)

DECLARE_F16_VELU_UKERNEL_FUNCTION(xnn_f16_velu_ukernel__avx2_rr1_p3_u8)
DECLARE_F16_VELU_UKERNEL_FUNCTION(xnn_f16_velu_ukernel__avx2_rr1_p3_u16)


#define DECLARE_F16_VHSWISH_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                          \
      size_t n,                                       \
      const void* x,                                  \
      void* y,                                        \
      const union xnn_f16_hswish_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F16_VHSWISH_UKERNEL_FUNCTION(xnn_f16_vhswish_ukernel__neonfp16arith_u8)
DECLARE_F16_VHSWISH_UKERNEL_FUNCTION(xnn_f16_vhswish_ukernel__neonfp16arith_u16)

DECLARE_F16_VHSWISH_UKERNEL_FUNCTION(xnn_f16_vhswish_ukernel__f16c_u8)
DECLARE_F16_VHSWISH_UKERNEL_FUNCTION(xnn_f16_vhswish_ukernel__f16c_u16)


#define DECLARE_F16_VNEG_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                       \
      size_t n,                                    \
      const void* x,                               \
      void* y,                                     \
      const union xnn_f16_neg_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);


DECLARE_F16_VNEG_UKERNEL_FUNCTION(xnn_f16_vneg_ukernel__neonfp16arith_u8)
DECLARE_F16_VNEG_UKERNEL_FUNCTION(xnn_f16_vneg_ukernel__neonfp16arith_u16)

DECLARE_F16_VNEG_UKERNEL_FUNCTION(xnn_f16_vneg_ukernel__sse2_u8)
DECLARE_F16_VNEG_UKERNEL_FUNCTION(xnn_f16_vneg_ukernel__sse2_u16)


#define DECLARE_F16_VRND_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                       \
      size_t n,                                    \
      const void* x,                               \
      void* y,                                     \
      const union xnn_f16_rnd_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndne_ukernel__f16c_u8)
DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndne_ukernel__f16c_u16)
DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndne_ukernel__neonfp16arith_u8)
DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndne_ukernel__neonfp16arith_u16)

DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndz_ukernel__f16c_u8)
DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndz_ukernel__f16c_u16)
DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndz_ukernel__neonfp16arith_u8)
DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndz_ukernel__neonfp16arith_u16)

DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndu_ukernel__f16c_u8)
DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndu_ukernel__f16c_u16)
DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndu_ukernel__neonfp16arith_u8)
DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndu_ukernel__neonfp16arith_u16)

DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndd_ukernel__f16c_u8)
DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndd_ukernel__f16c_u16)
DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndd_ukernel__neonfp16arith_u8)
DECLARE_F16_VRND_UKERNEL_FUNCTION(xnn_f16_vrndd_ukernel__neonfp16arith_u16)


#define DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                           \
      size_t batch,                                    \
      const void* input,                               \
      void* output,                                    \
      const union xnn_f16_sigmoid_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_u8)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_u16)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_u24)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_u32)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_u40)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_u48)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_u56)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_u64)

DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_u8)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_u16)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_u24)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_u32)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_u40)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_u48)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_u56)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_u64)

DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__aarch64_neonfp16arith_rr2_p2_div_u8)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__aarch64_neonfp16arith_rr2_p2_div_u16)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__aarch64_neonfp16arith_rr2_p2_div_u24)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__aarch64_neonfp16arith_rr2_p2_div_u32)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__aarch64_neonfp16arith_rr2_p2_div_u40)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__aarch64_neonfp16arith_rr2_p2_div_u48)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__aarch64_neonfp16arith_rr2_p2_div_u56)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__aarch64_neonfp16arith_rr2_p2_div_u64)

DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_u8)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_u16)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_u24)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_u32)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_u40)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_u48)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_u56)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_u64)

DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_u8)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_u16)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_u24)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_u32)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_u40)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_u48)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_u56)
DECLARE_F16_VSIGMOID_UKERNEL_FUNCTION(xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_u64)


#define DECLARE_F16_VSQR_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                       \
      size_t n,                                    \
      const void* x,                               \
      void* y,                                     \
      const union xnn_f16_default_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F16_VSQR_UKERNEL_FUNCTION(xnn_f16_vsqr_ukernel__neonfp16arith_u8)
DECLARE_F16_VSQR_UKERNEL_FUNCTION(xnn_f16_vsqr_ukernel__neonfp16arith_u16)

DECLARE_F16_VSQR_UKERNEL_FUNCTION(xnn_f16_vsqr_ukernel__f16c_u8)
DECLARE_F16_VSQR_UKERNEL_FUNCTION(xnn_f16_vsqr_ukernel__f16c_u16)


#define DECLARE_F16_VSQRT_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                        \
      size_t n,                                     \
      const void* x,                                \
      void* y,                                      \
      const union xnn_f16_sqrt_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F16_VSQRT_UKERNEL_FUNCTION(xnn_f16_vsqrt_ukernel__fp16arith_sqrt_u1)
DECLARE_F16_VSQRT_UKERNEL_FUNCTION(xnn_f16_vsqrt_ukernel__fp16arith_sqrt_u2)
DECLARE_F16_VSQRT_UKERNEL_FUNCTION(xnn_f16_vsqrt_ukernel__fp16arith_sqrt_u4)

DECLARE_F16_VSQRT_UKERNEL_FUNCTION(xnn_f16_vsqrt_ukernel__neonfp16arith_nr1fma1adj_u8)
DECLARE_F16_VSQRT_UKERNEL_FUNCTION(xnn_f16_vsqrt_ukernel__neonfp16arith_nr1fma1adj_u16)
DECLARE_F16_VSQRT_UKERNEL_FUNCTION(xnn_f16_vsqrt_ukernel__neonfp16arith_nr1fma1adj_u32)

DECLARE_F16_VSQRT_UKERNEL_FUNCTION(xnn_f16_vsqrt_ukernel__aarch64_neonfp16arith_sqrt_u8)
DECLARE_F16_VSQRT_UKERNEL_FUNCTION(xnn_f16_vsqrt_ukernel__aarch64_neonfp16arith_sqrt_u16)
DECLARE_F16_VSQRT_UKERNEL_FUNCTION(xnn_f16_vsqrt_ukernel__aarch64_neonfp16arith_sqrt_u32)

DECLARE_F16_VSQRT_UKERNEL_FUNCTION(xnn_f16_vsqrt_ukernel__f16c_sqrt_u8)
DECLARE_F16_VSQRT_UKERNEL_FUNCTION(xnn_f16_vsqrt_ukernel__f16c_sqrt_u16)
DECLARE_F16_VSQRT_UKERNEL_FUNCTION(xnn_f16_vsqrt_ukernel__f16c_sqrt_u32)

DECLARE_F16_VSQRT_UKERNEL_FUNCTION(xnn_f16_vsqrt_ukernel__f16c_rsqrt_u8)
DECLARE_F16_VSQRT_UKERNEL_FUNCTION(xnn_f16_vsqrt_ukernel__f16c_rsqrt_u16)
DECLARE_F16_VSQRT_UKERNEL_FUNCTION(xnn_f16_vsqrt_ukernel__f16c_rsqrt_u32)


#define DECLARE_F16_VTANH_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                        \
      size_t n,                                     \
      const void* x,                                \
      void* y,                                      \
      const union xnn_f16_tanh_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_div_u8)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_div_u16)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_div_u24)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_div_u32)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_div_u40)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_div_u48)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_div_u56)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_div_u64)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_div_u72)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_div_u80)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_rcp_u8)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_rcp_u16)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_rcp_u24)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_rcp_u32)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_rcp_u40)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_rcp_u48)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_rcp_u56)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_rcp_u64)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_rcp_u72)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_rcp_u80)

DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_div_u8)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_div_u16)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_div_u24)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_div_u32)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_div_u40)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_div_u48)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_div_u56)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_div_u64)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_div_u72)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_div_u80)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_rcp_u8)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_rcp_u16)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_rcp_u24)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_rcp_u32)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_rcp_u40)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_rcp_u48)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_rcp_u56)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_rcp_u64)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_rcp_u72)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_expm1minus_rr1_p3h2ts_rcp_u80)

DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_div_u8)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_div_u16)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_div_u24)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_div_u32)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_div_u40)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_div_u48)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_div_u56)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_div_u64)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_div_u72)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_div_u80)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_rcp_u8)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_rcp_u16)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_rcp_u24)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_rcp_u32)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_rcp_u40)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_rcp_u48)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_rcp_u56)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_rcp_u64)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_rcp_u72)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__avx2_expm1minus_rr1_p3h2ts_rcp_u80)

DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_polynomial_p19h9t2_u8)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_polynomial_p19h9t2_u16)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_polynomial_p19h9t2_u24)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_polynomial_p19h9t2_u32)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_polynomial_p19h9t2_u40)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_polynomial_p19h9t2_u48)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_polynomial_p19h9t2_u56)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_polynomial_p19h9t2_u64)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_polynomial_p19h9t2_u72)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__f16c_polynomial_p19h9t2_u80)

DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_polynomial_p19h9t2_u8)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_polynomial_p19h9t2_u16)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_polynomial_p19h9t2_u24)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_polynomial_p19h9t2_u32)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_polynomial_p19h9t2_u40)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_polynomial_p19h9t2_u48)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_polynomial_p19h9t2_u56)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_polynomial_p19h9t2_u64)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_polynomial_p19h9t2_u72)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__fma3_polynomial_p19h9t2_u80)

DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__aarch64_neonfp16arith_expm1minus_rr1_p3h2ts_div_u8)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__aarch64_neonfp16arith_expm1minus_rr1_p3h2ts_div_u16)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__aarch64_neonfp16arith_expm1minus_rr1_p3h2ts_div_u24)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__aarch64_neonfp16arith_expm1minus_rr1_p3h2ts_div_u32)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__aarch64_neonfp16arith_expm1minus_rr1_p3h2ts_div_u40)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__aarch64_neonfp16arith_expm1minus_rr1_p3h2ts_div_u48)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__aarch64_neonfp16arith_expm1minus_rr1_p3h2ts_div_u56)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__aarch64_neonfp16arith_expm1minus_rr1_p3h2ts_div_u64)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__aarch64_neonfp16arith_expm1minus_rr1_p3h2ts_div_u72)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__aarch64_neonfp16arith_expm1minus_rr1_p3h2ts_div_u80)

DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1fma_u8)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1fma_u16)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1fma_u24)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1fma_u32)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1fma_u40)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1fma_u48)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1fma_u56)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1fma_u64)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1fma_u72)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1fma_u80)

DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1recps_u8)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1recps_u16)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1recps_u24)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1recps_u32)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1recps_u40)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1recps_u48)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1recps_u56)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1recps_u64)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1recps_u72)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1recps_u80)

DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_recpeadj_u8)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_recpeadj_u16)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_recpeadj_u24)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_recpeadj_u32)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_recpeadj_u40)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_recpeadj_u48)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_recpeadj_u56)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_recpeadj_u64)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_recpeadj_u72)
DECLARE_F16_VTANH_UKERNEL_FUNCTION(xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_recpeadj_u80)


#define DECLARE_F32_VABS_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                       \
      size_t n,                                    \
      const float* x,                              \
      float* y,                                    \
      const union xnn_f32_abs_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__neon_u4)
DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__neon_u8)

DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__rvv_u1v)
DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__rvv_u2v)
DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__rvv_u4v)
DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__rvv_u8v)

DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__sse_u4)
DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__sse_u8)

DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__avx_u8)
DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__avx_u16)

DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__avx512f_u16)
DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__avx512f_u32)

DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__wasmsimd_u4)
DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__wasmsimd_u8)

DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__scalar_u1)
DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__scalar_u2)
DECLARE_F32_VABS_UKERNEL_FUNCTION(xnn_f32_vabs_ukernel__scalar_u4)


#define DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                         \
      size_t n,                                      \
      const float* x,                                \
      float* y,                                      \
      const union xnn_f32_minmax_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__neon_u4)
DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__neon_u8)
DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__neon_u16)

DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__rvv_u1v)
DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__rvv_u2v)
DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__rvv_u4v)
DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__rvv_u8v)

DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__sse_u4)
DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__sse_u8)

DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__avx_u8)
DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__avx_u16)

DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__avx512f_u16)
DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__avx512f_u32)

DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__wasm_u1)
DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__wasm_u2)
DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__wasm_u4)

DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__wasmsimd_arm_u4)
DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__wasmsimd_arm_u8)
DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__wasmsimd_x86_u4)
DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__wasmsimd_x86_u8)

DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__scalar_u1)
DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__scalar_u2)
DECLARE_F32_VCLAMP_UKERNEL_FUNCTION(xnn_f32_vclamp_ukernel__scalar_u4)


#define DECLARE_F32_VELU_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                       \
      size_t n,                                    \
      const float* x,                              \
      float* y,                                    \
      const union xnn_f32_elu_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_u12)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_u20)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_u24)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_p6_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_p6_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_p6_u12)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_p6_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_p6_u20)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neon_rr2_p6_u24)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_u12)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_u20)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_u24)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_p6_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_p6_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_p6_u12)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_p6_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_p6_u20)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__neonfma_rr1_p6_u24)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_u12)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_u20)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_u24)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_p6_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_p6_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_p6_u12)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_p6_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_p6_u20)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse2_rr2_p6_u24)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_u12)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_u20)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_u24)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_p6_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_p6_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_p6_u12)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_p6_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_p6_u20)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__sse41_rr2_p6_u24)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_u24)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_u32)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_u40)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_u48)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_u24)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_u32)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_u40)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_u48)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_p6_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_p6_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_p6_u24)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_p6_u32)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_p6_u40)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx_rr2_p6_u48)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_u24)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_u32)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_u40)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_u48)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_u56)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_u64)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_u72)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_u80)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_u24)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_u32)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_u40)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_u48)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_u56)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_u64)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_u72)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_u80)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_u24)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_u32)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_u40)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_u48)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_u56)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_u64)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_u72)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_u80)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_u24)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_u32)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_u40)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_u48)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_u56)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_u64)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_u72)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx2_rr1_p6_u80)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_u32)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_u48)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_u64)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_u80)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_u96)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_u112)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_u128)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_p6_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_p6_u32)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_p6_u48)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_p6_u64)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_p6_u80)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_p6_u96)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_p6_u112)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__avx512f_rr1_p6_u128)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_u12)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_u20)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_u24)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_u12)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_u20)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_u24)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_rr2_lut16_p3_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_rr2_lut16_p3_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_rr2_lut16_p3_u12)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_rr2_lut16_p3_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_rr2_lut16_p3_u20)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_rr2_lut16_p3_u24)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_fma_rr2_lut16_p3_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_fma_rr2_lut16_p3_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_fma_rr2_lut16_p3_u12)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_fma_rr2_lut16_p3_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_fma_rr2_lut16_p3_u20)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_fma_rr2_lut16_p3_u24)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_u12)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_u20)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_u24)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_u12)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_u20)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_u24)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_rr2_p6_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_rr2_p6_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_rr2_p6_u12)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_rr2_p6_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_rr2_p6_u20)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_rr2_p6_u24)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_fma_rr2_p6_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_fma_rr2_p6_u8)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_fma_rr2_p6_u12)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_fma_rr2_p6_u16)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_fma_rr2_p6_u20)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasmrelaxedsimd_fma_rr2_p6_u24)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_u1)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_u2)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_u3)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_u5)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_u6)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_p6_u1)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_p6_u2)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_p6_u3)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_p6_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_p6_u5)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__wasm_rr2_p6_u6)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_u1)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_u2)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_u3)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_u5)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_u6)

DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_p6_u1)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_p6_u2)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_p6_u3)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_p6_u4)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_p6_u5)
DECLARE_F32_VELU_UKERNEL_FUNCTION(xnn_f32_velu_ukernel__scalar_rr2_p6_u6)


#define DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                          \
      size_t n,                                       \
      const float* x,                                 \
      float* y,                                       \
      const union xnn_f32_hswish_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__neon_u4)
DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__neon_u8)
DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__neon_u16)

DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__rvv_u1v)
DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__rvv_u2v)
DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__rvv_u4v)
DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__rvv_u8v)

DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__sse_u4)
DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__sse_u8)

DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__avx_u8)
DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__avx_u16)

DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__fma3_u8)
DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__fma3_u16)

DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__avx512f_u16)
DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__avx512f_u32)

DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__wasmsimd_u4)
DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__wasmsimd_u8)
DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__wasmsimd_u16)

DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__wasm_u1)
DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__wasm_u2)
DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__wasm_u4)

DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__scalar_u1)
DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__scalar_u2)
DECLARE_F32_VHSWISH_UKERNEL_FUNCTION(xnn_f32_vhswish_ukernel__scalar_u4)


#define DECLARE_F16_VLRELU_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                         \
      size_t n,                                      \
      const void* x,                                 \
      void* y,                                       \
      const union xnn_f16_lrelu_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F16_VLRELU_UKERNEL_FUNCTION(xnn_f16_vlrelu_ukernel__neonfp16arith_u8)
DECLARE_F16_VLRELU_UKERNEL_FUNCTION(xnn_f16_vlrelu_ukernel__neonfp16arith_u16)

DECLARE_F16_VLRELU_UKERNEL_FUNCTION(xnn_f16_vlrelu_ukernel__f16c_u8)
DECLARE_F16_VLRELU_UKERNEL_FUNCTION(xnn_f16_vlrelu_ukernel__f16c_u16)


#define DECLARE_F32_VLRELU_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                         \
      size_t n,                                      \
      const float* x,                                \
      float* y,                                      \
      const union xnn_f32_lrelu_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);


DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__neon_u4)
DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__neon_u8)

DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__sse_u4)
DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__sse_u8)

DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__sse2_u4)
DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__sse2_u8)

DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__sse41_u4)
DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__sse41_u8)

DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__avx_u8)
DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__avx_u16)

DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__avx512f_u16)
DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__avx512f_u32)

DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__wasmsimd_laneselect_u4)
DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__wasmsimd_laneselect_u8)

DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__wasmsimd_iminmax_u4)
DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__wasmsimd_iminmax_u8)

DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__wasmrelaxedsimd_laneselect_u4)
DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__wasmrelaxedsimd_laneselect_u8)

DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__wasmrelaxedsimd_iminmax_u4)
DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__wasmrelaxedsimd_iminmax_u8)

DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__wasm_u1)
DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__wasm_u2)
DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__wasm_u4)

DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__scalar_u1)
DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__scalar_u2)
DECLARE_F32_VLRELU_UKERNEL_FUNCTION(xnn_f32_vlrelu_ukernel__scalar_u4)


#define DECLARE_F32_VNEG_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                       \
      size_t n,                                    \
      const float* x,                              \
      float* y,                                    \
      const union xnn_f32_neg_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);


DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__neon_u4)
DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__neon_u8)

DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__rvv_u1v)
DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__rvv_u2v)
DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__rvv_u4v)
DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__rvv_u8v)

DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__sse_u4)
DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__sse_u8)

DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__avx_u8)
DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__avx_u16)

DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__avx512f_u16)
DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__avx512f_u32)

DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__wasmsimd_u4)
DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__wasmsimd_u8)

DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__scalar_u1)
DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__scalar_u2)
DECLARE_F32_VNEG_UKERNEL_FUNCTION(xnn_f32_vneg_ukernel__scalar_u4)


#define DECLARE_F32_VRELU_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                        \
      size_t n,                                     \
      const float* x,                               \
      float* y,                                     \
      const union xnn_f32_relu_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__avx_u8)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__avx_u16)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__avx512f_u16)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__avx512f_u32)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__neon_u4)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__neon_u8)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__scalar_u1)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__scalar_u2)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__scalar_u4)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__scalar_u8)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__sse_u4)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__sse_u8)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasm_u1)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasm_u2)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasm_u4)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasm_u8)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasmsimd_u4)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasmsimd_u8)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasmsimd_u16)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasm32_shr_u1)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasm32_shr_u2)
DECLARE_F32_VRELU_UKERNEL_FUNCTION(xnn_f32_vrelu_ukernel__wasm32_shr_u4)

#define DECLARE_F32_VRND_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                       \
      size_t n,                                    \
      const float* x,                              \
      float* y,                                    \
      const union xnn_f32_rnd_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__avx_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__avx_u16)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__avx512f_u16)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__avx512f_u32)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__neon_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__neon_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__neonv8_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__neonv8_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__scalar_libm_u1)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__scalar_libm_u2)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__scalar_libm_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__sse2_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__sse2_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__sse41_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__sse41_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__wasmsimd_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndne_ukernel__wasmsimd_u8)

DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__avx_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__avx_u16)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__avx512f_u16)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__avx512f_u32)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__neon_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__neon_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__neonv8_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__neonv8_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__scalar_libm_u1)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__scalar_libm_u2)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__scalar_libm_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__sse2_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__sse2_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__sse41_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__sse41_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__wasmsimd_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndz_ukernel__wasmsimd_u8)

DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__avx_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__avx_u16)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__avx512f_u16)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__avx512f_u32)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__neon_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__neon_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__neonv8_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__neonv8_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__scalar_libm_u1)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__scalar_libm_u2)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__scalar_libm_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__sse2_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__sse2_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__sse41_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__sse41_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__wasmsimd_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndu_ukernel__wasmsimd_u8)

DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__avx_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__avx_u16)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__avx512f_u16)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__avx512f_u32)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__neon_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__neon_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__neonv8_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__neonv8_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__scalar_libm_u1)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__scalar_libm_u2)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__scalar_libm_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__sse2_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__sse2_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__sse41_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__sse41_u8)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__wasmsimd_u4)
DECLARE_F32_VRND_UKERNEL_FUNCTION(xnn_f32_vrndd_ukernel__wasmsimd_u8)


#define DECLARE_F32_VSQRT_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                        \
      size_t n,                                     \
      const float* x,                               \
      float* y,                                     \
      const union xnn_f32_sqrt_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__aarch64_neon_sqrt_u4)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__aarch64_neon_sqrt_u8)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__aarch64_neon_sqrt_u16)

DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_u4)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_u8)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_u16)

DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_u4)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_u8)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_u16)

DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__rvv_sqrt_u1v)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__rvv_sqrt_u2v)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__rvv_sqrt_u4v)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__rvv_sqrt_u8v)

DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__sse_sqrt_u4)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__sse_sqrt_u8)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__sse_sqrt_u16)

DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__avx_sqrt_u8)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__avx_sqrt_u16)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__avx_sqrt_u32)

DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_u8)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_u16)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_u32)

DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_u16)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_u32)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_u64)

DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_u4)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_u8)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_u16)

DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__scalar_sqrt_u1)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__scalar_sqrt_u2)
DECLARE_F32_VSQRT_UKERNEL_FUNCTION(xnn_f32_vsqrt_ukernel__scalar_sqrt_u4)

#define DECLARE_F32_VRSQRT_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                         \
      size_t n,                                      \
      const float* x,                                \
      float* y,                                      \
      const union xnn_f32_rsqrt_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F32_VRSQRT_UKERNEL_FUNCTION(xnn_f32_vrsqrt_ukernel__scalar_recip_sqrt_u1)
DECLARE_F32_VRSQRT_UKERNEL_FUNCTION(xnn_f32_vrsqrt_ukernel__scalar_recip_sqrt_u2)
DECLARE_F32_VRSQRT_UKERNEL_FUNCTION(xnn_f32_vrsqrt_ukernel__scalar_recip_sqrt_u4)
DECLARE_F32_VRSQRT_UKERNEL_FUNCTION(xnn_f32_vrsqrt_ukernel__scalar_recip_sqrt_u8)
DECLARE_F32_VRSQRT_UKERNEL_FUNCTION(xnn_f32_vrsqrt_ukernel__scalar_recip_sqrt_u16)

#define DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                           \
      size_t n,                                        \
      const float* x,                                  \
      float* y,                                        \
      const union xnn_f32_sigmoid_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__aarch64_neonfma_rr1_p5_div_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__aarch64_neonfma_rr1_p5_div_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__aarch64_neonfma_rr1_p5_div_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__aarch64_neonfma_rr1_p5_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__aarch64_neonfma_rr1_p5_div_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__aarch64_neonfma_rr1_p5_div_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__aarch64_neonfma_rr1_lut64_p2_div_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__aarch64_neonfma_rr1_lut64_p2_div_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__aarch64_neonfma_rr1_lut64_p2_div_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__aarch64_neonfma_rr1_lut64_p2_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__aarch64_neonfma_rr1_lut64_p2_div_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__aarch64_neonfma_rr1_lut64_p2_div_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__aarch64_neonfma_rr1_lut2048_p1_div_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__aarch64_neonfma_rr1_lut2048_p1_div_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__aarch64_neonfma_rr1_lut2048_p1_div_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__aarch64_neonfma_rr1_lut2048_p1_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__aarch64_neonfma_rr1_lut2048_p1_div_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__aarch64_neonfma_rr1_lut2048_p1_div_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_u24)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_u32)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_u40)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_u48)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_u56)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_u64)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_u72)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_u80)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_u24)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_u32)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_u40)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_u48)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_u56)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_u64)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_u72)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_u80)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_u24)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_u32)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_u40)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_u48)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_u56)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_u64)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_u72)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_u80)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_u24)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_u32)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_u40)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_u48)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_u56)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_u64)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_u72)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_u80)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_u24)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_u32)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_u40)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_u48)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_u56)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_u64)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_u72)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_u80)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_u32)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_u48)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_u64)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_u80)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_u96)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_u112)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_u128)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_u32)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_u48)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_u64)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_u80)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_u96)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_u112)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_u128)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_u32)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_u48)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_u64)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_u80)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_u96)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_u112)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_u128)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_u32)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_u48)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_u64)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_u80)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_u96)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_u112)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_u128)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_u32)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_u48)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_u64)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_u80)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_u96)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_u112)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_u128)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_u32)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_u48)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_u64)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_u80)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_u96)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_u112)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_u128)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_rr2_p5_div_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_rr2_p5_div_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_rr2_p5_div_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_rr2_p5_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_rr2_p5_div_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_rr2_p5_div_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmblendvps_rr2_p5_div_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmblendvps_rr2_p5_div_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmblendvps_rr2_p5_div_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmblendvps_rr2_p5_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmblendvps_rr2_p5_div_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmblendvps_rr2_p5_div_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_rr2_lut64_p2_div_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_rr2_lut64_p2_div_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_rr2_lut64_p2_div_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_rr2_lut64_p2_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_rr2_lut64_p2_div_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_rr2_lut64_p2_div_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_fma_rr2_p5_div_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_fma_rr2_p5_div_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_fma_rr2_p5_div_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_fma_rr2_p5_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_fma_rr2_p5_div_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_fma_rr2_p5_div_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmblendvps_fma_rr2_p5_div_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmblendvps_fma_rr2_p5_div_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmblendvps_fma_rr2_p5_div_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmblendvps_fma_rr2_p5_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmblendvps_fma_rr2_p5_div_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmblendvps_fma_rr2_p5_div_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_fma_rr2_lut64_p2_div_u4)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_fma_rr2_lut64_p2_div_u8)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_fma_rr2_lut64_p2_div_u12)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_fma_rr2_lut64_p2_div_u16)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_fma_rr2_lut64_p2_div_u20)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_fma_rr2_lut64_p2_div_u24)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_u1)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_u2)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_u4)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_u1)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_u2)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_u4)

DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_u1)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_u2)
DECLARE_F32_VSIGMOID_UKERNEL_FUNCTION(xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_u4)


#define DECLARE_F32_VSQR_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                       \
      size_t n,                                    \
      const float* x,                              \
      float* y,                                    \
      const union xnn_f32_default_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__neon_u4)
DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__neon_u8)

DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__rvv_u1v)
DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__rvv_u2v)
DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__rvv_u4v)
DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__rvv_u8v)

DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__sse_u4)
DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__sse_u8)

DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__avx_u8)
DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__avx_u16)

DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__avx512f_u16)
DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__avx512f_u32)

DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__wasmsimd_u4)
DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__wasmsimd_u8)

DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__scalar_u1)
DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__scalar_u2)
DECLARE_F32_VSQR_UKERNEL_FUNCTION(xnn_f32_vsqr_ukernel__scalar_u4)


#define DECLARE_F32_VTANH_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                        \
      size_t n,                                     \
      const float* x,                               \
      float* y,                                     \
      const union xnn_f32_tanh_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__aarch64_neonfma_expm1minus_rr1_lut8_p4h3ts_div_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__aarch64_neonfma_expm1minus_rr1_lut8_p4h3ts_div_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__aarch64_neonfma_expm1minus_rr1_lut8_p4h3ts_div_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__aarch64_neonfma_expm1minus_rr1_lut8_p4h3ts_div_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__aarch64_neonfma_expm1minus_rr1_p6h5ts_div_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__aarch64_neonfma_expm1minus_rr1_p6h5ts_div_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__aarch64_neonfma_expm1minus_rr1_p6h5ts_div_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__aarch64_neonfma_expm1minus_rr1_p6h5ts_div_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neon_expm1minus_rr1_p6h5ts_nr2recps_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neon_expm1minus_rr1_p6h5ts_nr2recps_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neon_expm1minus_rr1_p6h5ts_nr2recps_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neon_expm1minus_rr1_p6h5ts_nr2recps_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_lut8_p4h3ts_nr1recps1fma_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_lut8_p4h3ts_nr1recps1fma_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_lut8_p4h3ts_nr1recps1fma_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_lut8_p4h3ts_nr1recps1fma_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_lut8_p4h3ts_nr2fma_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_lut8_p4h3ts_nr2fma_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_lut8_p4h3ts_nr2fma_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_lut8_p4h3ts_nr2fma_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_p6h5ts_nr1recps1fma_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_p6h5ts_nr1recps1fma_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_p6h5ts_nr1recps1fma_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_p6h5ts_nr1recps1fma_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_p6h5ts_nr2fma_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_p6h5ts_nr2fma_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_p6h5ts_nr2fma_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_p6h5ts_nr2fma_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_p6h5ts_nr2recps_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_p6h5ts_nr2recps_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_p6h5ts_nr2recps_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_p6h5ts_nr2recps_u16)

DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma_expm1minus_rr1_lut8_p4h3ts_div_u1)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma_expm1minus_rr1_lut8_p4h3ts_div_u2)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma_expm1minus_rr1_lut8_p4h3ts_div_u4)

DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma_expm1minus_rr1_p6h5ts_div_u1)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma_expm1minus_rr1_p6h5ts_div_u2)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma_expm1minus_rr1_p6h5ts_div_u4)

DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__scalar_expm1minus_rr1_lut8_p4h3ts_div_u1)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__scalar_expm1minus_rr1_lut8_p4h3ts_div_u2)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__scalar_expm1minus_rr1_lut8_p4h3ts_div_u4)

DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__scalar_expm1minus_rr1_p6h5ts_div_u1)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__scalar_expm1minus_rr1_p6h5ts_div_u2)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__scalar_expm1minus_rr1_p6h5ts_div_u4)

DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasm_expm1minus_rr1_lut8_p4h3ts_div_u1)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasm_expm1minus_rr1_lut8_p4h3ts_div_u2)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasm_expm1minus_rr1_lut8_p4h3ts_div_u4)

DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasm_expm1minus_rr1_p6h5ts_div_u1)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasm_expm1minus_rr1_p6h5ts_div_u2)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasm_expm1minus_rr1_p6h5ts_div_u4)

DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse2_expm1minus_rr1_lut8_p4h3ts_div_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse2_expm1minus_rr1_lut8_p4h3ts_div_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse2_expm1minus_rr1_lut8_p4h3ts_div_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse2_expm1minus_rr1_lut8_p4h3ts_div_u16)

DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse2_expm1minus_rr1_p6h5ts_div_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse2_expm1minus_rr1_p6h5ts_div_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse2_expm1minus_rr1_p6h5ts_div_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse2_expm1minus_rr1_p6h5ts_div_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse2_expm1minus_rr1_p6h5ts_nr1_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse2_expm1minus_rr1_p6h5ts_nr1_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse2_expm1minus_rr1_p6h5ts_nr1_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse2_expm1minus_rr1_p6h5ts_nr1_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse2_expm1minus_rr1_p6h5ts_nr2_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse2_expm1minus_rr1_p6h5ts_nr2_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse2_expm1minus_rr1_p6h5ts_nr2_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse2_expm1minus_rr1_p6h5ts_nr2_u16)

DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_lut8_p4h3ts_div_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_lut8_p4h3ts_div_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_lut8_p4h3ts_div_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_lut8_p4h3ts_div_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_lut8_p4h3ts_div_u20)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_lut8_p4h3ts_div_u24)

DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_p6h5ts_div_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_p6h5ts_div_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_p6h5ts_div_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_p6h5ts_div_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_p6h5ts_div_u20)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_p6h5ts_div_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_p6h5ts_nr1_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_p6h5ts_nr1_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_p6h5ts_nr1_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_p6h5ts_nr1_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_p6h5ts_nr1_u20)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_p6h5ts_nr1_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_p6h5ts_nr2_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_p6h5ts_nr2_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_p6h5ts_nr2_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_p6h5ts_nr2_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_p6h5ts_nr2_u20)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_p6h5ts_nr2_u24)

DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_lut4_p4h2ts_perm_div_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_lut4_p4h2ts_perm_div_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_lut4_p4h2ts_perm_div_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_lut4_p4h2ts_perm_div_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_lut4_p4h2ts_perm_div_u40)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_lut4_p4h2ts_perm_div_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_lut4_p4h2ts_perm_div_u56)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_lut4_p4h2ts_perm_div_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_lut4_p4h2ts_perm_div_u72)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_lut4_p4h2ts_perm_div_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_lut8_p4h3ts_div_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_lut8_p4h3ts_div_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_lut8_p4h3ts_div_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_lut8_p4h3ts_div_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_div_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_div_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_div_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_div_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_div_u40)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_div_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_div_u56)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_div_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_div_u72)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_div_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr1_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr1_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr1_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr1_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr1_u40)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr1_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr1_u56)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr1_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr1_u72)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr1_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr2_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr2_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr2_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr2_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr2_u40)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr2_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr2_u56)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr2_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr2_u72)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_p6h5ts_nr2_u80)

DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_div_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_div_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_div_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_div_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_div_u40)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_div_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_div_u56)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_div_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_div_u72)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_div_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u40)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u56)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u72)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut8_p4h3ts_div_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut8_p4h3ts_div_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut8_p4h3ts_div_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut8_p4h3ts_div_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut8_p4h3ts_nr1adj_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut8_p4h3ts_nr1adj_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut8_p4h3ts_nr1adj_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut8_p4h3ts_nr1adj_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_div_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_div_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_div_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_div_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_div_u40)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_div_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_div_u56)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_div_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_div_u72)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_div_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1_u40)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1_u56)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1_u72)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1adj_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1adj_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1adj_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1adj_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1adj_u40)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1adj_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1adj_u56)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1adj_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1adj_u72)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_p6h5ts_nr1adj_u80)

DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_div_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_div_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_div_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_div_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_div_u40)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_div_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_div_u56)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_div_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_div_u72)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_div_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u40)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u56)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u72)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_div_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_div_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_div_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_div_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_div_u40)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_div_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_div_u56)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_div_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_div_u72)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_div_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u40)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u56)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u72)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_div_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_div_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_div_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_div_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_div_u40)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_div_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_div_u56)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_div_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_div_u72)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_div_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u40)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u56)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u72)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_div_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_div_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_div_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_div_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_div_u40)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_div_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_div_u56)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_div_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_div_u72)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_div_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1_u40)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1_u56)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1_u72)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1adj_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1adj_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1adj_u24)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1adj_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1adj_u40)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1adj_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1adj_u56)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1adj_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1adj_u72)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_p6h5ts_nr1adj_u80)

DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_div_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_div_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_div_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_div_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_div_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_div_u96)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_div_u112)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_div_u128)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_div_u144)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_div_u160)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u96)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u112)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u128)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u144)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_nr1adj_u160)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_div_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_div_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_div_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_div_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_div_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_div_u96)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_div_u112)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_div_u128)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_div_u144)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_div_u160)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u96)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u112)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u128)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u144)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_gather_nr1adj_u160)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_div_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_div_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_div_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_div_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_div_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_div_u96)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_div_u112)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_div_u128)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_div_u144)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_div_u160)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u96)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u112)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u128)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u144)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut8_p4h3ts_perm_nr1adj_u160)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_div_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_div_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_div_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_div_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_div_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_div_u96)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_div_u112)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_div_u128)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_div_u144)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_div_u160)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_nr1_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_nr1_u32)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_nr1_u48)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_nr1_u64)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_nr1_u80)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_nr1_u96)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_nr1_u112)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_nr1_u128)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_nr1_u144)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_p6h5ts_nr1_u160)

DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_lut8_p4h3ts_div_abs_min_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_lut8_p4h3ts_div_abs_min_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_lut8_p4h3ts_div_abs_min_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_lut8_p4h3ts_div_abs_min_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_lut8_p4h3ts_div_abs_pmin_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_lut8_p4h3ts_div_abs_pmin_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_lut8_p4h3ts_div_abs_pmin_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_lut8_p4h3ts_div_abs_pmin_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_lut8_p4h3ts_div_nabs_max_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_lut8_p4h3ts_div_nabs_max_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_lut8_p4h3ts_div_nabs_max_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_lut8_p4h3ts_div_nabs_max_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_lut8_p4h3ts_div_nabs_pmax_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_lut8_p4h3ts_div_nabs_pmax_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_lut8_p4h3ts_div_nabs_pmax_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_lut8_p4h3ts_div_nabs_pmax_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_p6h5ts_div_abs_min_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_p6h5ts_div_abs_min_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_p6h5ts_div_abs_min_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_p6h5ts_div_abs_min_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_p6h5ts_div_abs_pmin_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_p6h5ts_div_abs_pmin_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_p6h5ts_div_abs_pmin_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_p6h5ts_div_abs_pmin_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_p6h5ts_div_nabs_max_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_p6h5ts_div_nabs_max_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_p6h5ts_div_nabs_max_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_p6h5ts_div_nabs_max_u16)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_p6h5ts_div_nabs_pmax_u4)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_p6h5ts_div_nabs_pmax_u8)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_p6h5ts_div_nabs_pmax_u12)
DECLARE_F32_VTANH_UKERNEL_FUNCTION(xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_p6h5ts_div_nabs_pmax_u16)


#define DECLARE_S8_VCLAMP_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                        \
      size_t n,                                     \
      const int8_t* x,                              \
      int8_t* y,                                    \
      const union xnn_s8_minmax_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_S8_VCLAMP_UKERNEL_FUNCTION(xnn_s8_vclamp_ukernel__neon_u64)
DECLARE_S8_VCLAMP_UKERNEL_FUNCTION(xnn_s8_vclamp_ukernel__scalar_u4)
DECLARE_S8_VCLAMP_UKERNEL_FUNCTION(xnn_s8_vclamp_ukernel__sse2_u64)
DECLARE_S8_VCLAMP_UKERNEL_FUNCTION(xnn_s8_vclamp_ukernel__sse41_u64)
DECLARE_S8_VCLAMP_UKERNEL_FUNCTION(xnn_s8_vclamp_ukernel__wasmsimd_u64)


#define DECLARE_U8_VCLAMP_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                        \
      size_t n,                                     \
      const uint8_t* x,                             \
      uint8_t* y,                                   \
      const union xnn_u8_minmax_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_U8_VCLAMP_UKERNEL_FUNCTION(xnn_u8_vclamp_ukernel__neon_u64)
DECLARE_U8_VCLAMP_UKERNEL_FUNCTION(xnn_u8_vclamp_ukernel__scalar_u4)
DECLARE_U8_VCLAMP_UKERNEL_FUNCTION(xnn_u8_vclamp_ukernel__sse2_u64)
DECLARE_U8_VCLAMP_UKERNEL_FUNCTION(xnn_u8_vclamp_ukernel__wasmsimd_u64)


#define DECLARE_U64_U32_VSQRTSHIFT_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                                 \
      size_t n,                                              \
      const uint64_t* x,                                     \
      uint32_t* y,                                           \
      uint32_t shift);

DECLARE_U64_U32_VSQRTSHIFT_UKERNEL_FUNCTION(xnn_u64_u32_vsqrtshift_ukernel__scalar_cvtu32_sqrt_cvtu32f64_u1)


#define DECLARE_XX_VUNARY_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                        \
      size_t size,                                  \
      const void* input,                            \
      void* output,                                 \
      const void* params);

DECLARE_XX_VUNARY_UKERNEL_FUNCTION(xnn_xx_copy_ukernel__scalar_memcpy)

#ifdef __cplusplus
}  // extern "C"
#endif
